Export analysis values

This notebook exports values from the database to a csv file for further analysis.

Import statements


In [1]:
from datetime import date, time, timedelta, datetime

In [2]:
import pandas
import numpy as np

In [3]:
from nacoustik import sum_decibels

In [4]:
from database.models import Sound
from metrics.models import BiophonyPercent, SoundExposureLevel
from weather.models import Record
from landscape.models import LandCoverArea, NaturalnessArea

Variable declarations


In [5]:
csv_filepath = "/Users/jake/Desktop/dataset.csv"

In [6]:
time_offset = timedelta(hours = 1)

In [7]:
datetime


Out[7]:
datetime.datetime

In [8]:
start_datetime = datetime(2016, 1, 27)

In [9]:
#included_area = '500m'

Function declarations


In [10]:
def calculate_average_wind_speed(start_offset, end_offset):
    wind_speed = "{0:.1f}".format(Record.objects.filter(date__range = (start_offset.date(), 
                                                                       end_offset.date()), 
                                                        time__range = (start_offset.time(), 
                                                                       end_offset.time())).aggregate(Avg('wind_speed'))['wind_speed__avg'])
    return wind_speed

In [11]:
def calculate_average_temperature(start_offset, end_offset):
    temperature = "{0:.1f}".format(Record.objects.filter(date__range = (start_offset.date(), 
                                                                        end_offset.date()), 
                                                         time__range = (start_offset.time(), 
                                                                        end_offset.time())).aggregate(Avg('temperature'))['temperature__avg'])
    return temperature

In [12]:
def calculate_average_precipitation(start_offset, end_offset):
    precipitation = "{0:.1f}".format(Record.objects.filter(date__range = (start_offset.date(), 
                                                                        end_offset.date()), 
                                                         time__range = (start_offset.time(), 
                                                                        end_offset.time())).aggregate(Sum('precipitation'))['precipitation__sum'])
    return precipitation

In [13]:
def calculate_average_pressure(start_offset, end_offset):
    pressure = "{0:.1f}".format(Record.objects.filter(date__range = (start_offset.date(), 
                                                                        end_offset.date()), 
                                                         time__range = (start_offset.time(), 
                                                                        end_offset.time())).aggregate(Avg('pressure'))['pressure__avg'])
    return pressure

In [14]:
def get_landcover_areas(site, included_area, cover_type):
    area = LandCoverArea.objects.filter(site__exact = site, included_area__exact = included_area).get()
    return eval("area.{0}".format(cover_type))

In [15]:
def get_naturalness_areas(site, included_area, cover_type):
    area = NaturalnessArea.objects.filter(site__exact = site, included_area__exact = included_area).get()
    return eval("area.{0}".format(cover_type))

In [16]:
def calculate_sel(sound):
    try:
        sel_record = SoundExposureLevel.objects.get(sound = sound)
        anthrophony = sum_decibels(np.array(eval(sel_record.sel)[0:2]).astype(np.float64))
        biophony = sum_decibels(np.array(eval(sel_record.sel)[2:10]).astype(np.float64))
        return anthrophony, biophony
    except SoundExposureLevel.DoesNotExist:
        return 0., 0.

In [17]:
def get_ndsi(sound):
    ndsi = NDSI.objects.get(sound = sound)
    return ndsi.ndsi_left + ndsi.ndsi_right

In [18]:
def get_aci(sound):
    aci = AcousticComplexityIndex.objects.get(sound = sound)
    return aci.aci_left + aci.aci_right

In [19]:
def get_bai(sound):
    bai = BioacousticIndex.objects.get(sound = sound)
    return bai.bai_left + bai.bai_right

In [20]:
def get_pss(sound):
    pss = PowerSpectrumSum.objects.get(sound = sound)
    a = pss.anthrophony
    b = pss.biophony
    t = pss.total
    return a, b, t

In [21]:
def get_sel(sound):
    record = SoundExposureLevel.objects.get(sound = sound)
    sel = record.sel
    sel_anthrophony = record.anthrophony
    sel_biophony = record.biophony
    return sel, sel_anthrophony, sel_biophony

In [22]:
def get_biophony_percent(sound):
    record = BiophonyPercent.objects.get(sound = sound)
    return record.biophony_percent

In [23]:
def get_week(sound):
    return (sound.date - start_datetime.date()).days / 7

Pull values


In [24]:
#sounds = Sound.objects.filter(quality__exact = 1)

In [25]:
sounds = Sound.objects.filter(quality__range = (0, 1))

In [26]:
values = pandas.DataFrame(columns=['sound', 'site', 'site_name',
                                   'sel', 'sel_anthrophony', 'sel_biophony',
                                   'biophony', 'week',
                                   
                                   'building_50m', 'pavement_50m', 
                                   'forest_50m', 'field_50m',
                                   
                                   'building_100m', 'pavement_100m', 
                                   'forest_100m', 'field_100m', 
                                   
                                   'building_200m', 'pavement_200m', 
                                   'forest_200m', 'field_200m', 
                                   
                                   'building_500m', 'pavement_500m', 
                                   'forest_500m', 'field_500m', 
                                   
                                   'temperature', 'wind_speed', 
                                   'precipitation', 'pressure'])

values = values.append(row)


In [27]:
for sound in sounds:
    
    # initialize dict to hold row values
    sound_row = dict()
    
    # compute time offsets for weather data calculations
    recorded_datetime = datetime(sound.date.year, sound.date.month, sound.date.day, 
                             sound.time.hour, sound.time.minute, sound.time.second)
    start_offset = recorded_datetime - time_offset
    end_offset = recorded_datetime + time_offset
    
    # sound
    sound_row['sound'] = sound.id
    # site
    sound_row['site'] = sound.site.id
    # site name
    sound_row['site_name'] = sound.site.name
    # sel
    sound_row['sel'], sound_row['sel_anthrophony'], sound_row['sel_biophony'] = get_sel(sound)
    # biophony_percent
    sound_row['biophony'] = get_biophony_percent(sound)
    # week
    sound_row['week'] = get_week(sound)
    
    # building_50m
    sound_row['building_50m'] = (get_landcover_areas(sound.site, '50m', 'type_1') / 7756.657171) * 100
    # pavement_50m
    sound_row['pavement_50m'] = (get_landcover_areas(sound.site, '50m', 'type_2') / 7756.657171) * 100
    # forest_50m
    sound_row['forest_50m'] = (get_landcover_areas(sound.site, '50m', 'type_9') / 7756.657171) * 100
    # field_50m
    sound_row['field_50m'] = (get_landcover_areas(sound.site, '50m', 'type_12') / 7756.657171) * 100
    
    # building_100m
    sound_row['building_100m'] = (get_landcover_areas(sound.site, '100m', 'type_1') / 31026.6287) * 100
    # pavement_100m
    sound_row['pavement_100m'] = (get_landcover_areas(sound.site, '100m', 'type_2') / 31026.6287) * 100
    # forest_100m
    sound_row['forest_100m'] = (get_landcover_areas(sound.site, '100m', 'type_9') / 31026.6287) * 100
    # field_100m
    sound_row['field_100m'] = (get_landcover_areas(sound.site, '100m', 'type_12') / 31026.6287) * 100
    
    # building_200m
    sound_row['building_200m'] = (get_landcover_areas(sound.site, '200m', 'type_1') / 124106.515) * 100
    # pavement_200m
    sound_row['pavement_200m'] = (get_landcover_areas(sound.site, '200m', 'type_2') / 124106.515) * 100
    # forest_200m
    sound_row['forest_200m'] = (get_landcover_areas(sound.site, '200m', 'type_9') / 124106.515) * 100
    # field_200m
    sound_row['field_200m'] = (get_landcover_areas(sound.site, '200m', 'type_12') / 124106.515) * 100
    
    # building_500m
    sound_row['building_500m'] = (get_landcover_areas(sound.site, '500m', 'type_1') / 775665.7171) * 100
    # pavement_500m
    sound_row['pavement_500m'] = (get_landcover_areas(sound.site, '500m', 'type_2') / 775665.7171) * 100
    # forest_500m
    sound_row['forest_500m'] = (get_landcover_areas(sound.site, '500m', 'type_9') / 775665.7171) * 100
    # field_500m
    sound_row['field_500m'] = (get_landcover_areas(sound.site, '500m', 'type_12') / 775665.7171) * 100  

    # temperature
    sound_row['temperature'] = calculate_average_temperature(start_offset, end_offset)
    # wind_speed
    sound_row['wind_speed'] = calculate_average_wind_speed(start_offset, end_offset)
    # precipitation
    sound_row['precipitation'] = calculate_average_precipitation(start_offset, end_offset)
    # pressure
    sound_row['pressure'] = calculate_average_pressure(start_offset, end_offset)
    
    # append row dict to values dataframe
    values = values.append(sound_row, ignore_index = True)

# clean up dataframe
values['sound'] = values['sound'].astype(np.int)
values['site'] = values['site'].astype(np.int)
values.set_index('sound', inplace = True)

Join roads data

load csv


In [28]:
filepath = "/Users/jake/OneDrive/Documents/innsbruck_dataset/distanceroads.csv"
roads_data = pandas.read_csv(filepath).set_index('site')

join


In [29]:
values = values.join(roads_data, on='site')

Join D2N values

load csv


In [30]:
filepath = "/Users/jake/OneDrive/Documents/alpine soundscapes/data/average_d2n.csv"
D2N_data = pandas.read_csv(filepath).set_index('site')

join


In [31]:
values = values.join(D2N_data, on='site')

Export data to csv


In [32]:
values.to_csv(csv_filepath)